# delimit ;
clear ;
cd "replication" ;
set more off ; 

* *************************************************************************** ;
* load data 
* *************************************************************************** ;

use "./data/endline.dta", clear ; 

* *************************************************************************** ;
* merge with baseline to get completed_elem (used later for hetero analysis)
* *************************************************************************** ;

merge 1:1 Id using "./data/baseline.dta", keepusing(completed_elem) ;
assert _merge != 1 ; 

* _merge == 2 are respondents with baseline but no endline ; 
drop if _merge == 2 ;
drop _merge ;

* *************************************************************************** ;
* construct outcome variables 
* *************************************************************************** ;

* generate aggregate scores ; 
global module1_components "wash_defecation wash_eating wash_cooking" ; 
global module23_components "pregnancy_dai_certificate pregnancy_breastmilk diar_know" ;
global module4_components "aids_heard aids_blood aids_sex aids_touch aids_needle aids_pregnant aids_pregnant_baby aids_curable condom_hiv condom_std condom_pregnancy condom_disease" ;
global module5_components "nightblindness_heard nightblindness_nutrition nightblindness_curable" ;

egen module_all = rowmean($module1_components $module23_components $module4_components $module5_components) ; 

egen module1_know = rowmean($module1_components) ; 
egen module23_know = rowmean($module23_components) ;
egen module4_know = rowmean($module4_components) ;
egen module5_know = rowmean($module5_components) ;

* check that the total score lines up ;
assert round(module_all,0.001) == round((3*module1_know + 3*module23_know + 12*module4_know + 3*module5_know)/21,0.001) ;

****************************************************************************** ;
* generate randomization strata and wave-class variable
****************************************************************************** ;

egen strata = group(wave neighborhood female mfi) ;
egen waveclass = group(wave class) ;

****************************************************************************** ;
* label variables for the LaTeX tables
****************************************************************************** ;

label var module_all " " ;
label var module1_know "\shortstack[l]{Cleanli-\\ness and \\ Hygiene}" ; 
label var module23_know "\shortstack[l]{Midwives,\\Maternal \\ \& Child \\ Health}" ; 
label var module4_know "\shortstack[l]{Condom,\\ AIDS, \\Syphilis}" ; 
label var module5_know "\shortstack[l]{Night \\ blindness}" ;

label var healthonly "HEE" ;
label var healthandpay "HEEC" ;

****************************************************************************** ;
* regressions
****************************************************************************** ;

capture est drop module* ;

foreach var in module_all module1_know module23_know module4_know module5_know { ;
	
	areg `var' healthonly healthandpay, a(strata) robust ;
	qui est sto `var'; 
	
	test healthonly = healthandpay ; 
	estadd local titlerow = "" ; 
	estadd scalar fstat = r(F): `var' ; 
	estadd scalar pval = r(p): `var' ; 

	summarize `var' if control == 1 & e(sample) == 1;
	estadd scalar cmean = r(mean): `var' ; 
	estadd scalar csd = r(sd): `var' ;

} ;

esttab module* using "./output/table-endline-knowledge-aggregate.tex",
	replace
	drop(_cons) 
	cells(b(label() star fmt(%9.3f %9.3f)) se(par)) 
	star(* 0.10 ** 0.05 *** 0.01) 
	stats(titlerow fstat pval cmean csd N, fmt(%9s %9.3f %9.3f %9.3f %9.3f %9.0f) labels("\(F\)-test, HEE = HEEC" "$\qquad$ \(F\)-statistic" "$\qquad$ \(p\)-value" "Control Mean" "Control SD" "N"))
	prehead(\begin{table}[htbp] \centering \normalsize `"\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi}"'
		\captionsetup{justification=centering} 
		\caption{Effects on Longer-Term Health Knowledge}
		\label{table-endline-knowledge-aggregate}
		\begin{tabular*}{0.9\hsize}{p{4.5cm}p{1.5cm}p{1.5cm}p{1.5cm}p{1.5cm}p{1.5cm}}
		\toprule)
	legend label  booktabs  collabels( , none)
	mgroups(
	"All Topics" 
	"By Topic" 
	, pattern(1 1 0 0 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span}))
	postfoot(`"\bottomrule"'  \end{tabular*} \captionsetup{justification=justified, width=0.9\hsize} 
		\caption*{\footnotesize \textit{Notes:} 
		Outcomes are from an endline survey conducted approximately 10 months after the final film screening.
		The dependent variables are the proportion of correct answers in all health 
		knowledge questions (Column 1) or in a given subset of questions by topic (Columns 2 to 5).
		Regression results for individual questions are presented in 
		Supplementary Material Tables \ref{table-endline-knowledge-by-question-modules-1-2-3-5} and \ref{table-endline-knowledge-by-question-modules-4}. 
		\textit{HEE} is a dummy for assignment to only health entertainment-education. 
		\textit{HEEC} is a dummy for assignment to health entertainment-education with cash incentives for test performance. 
		Robust standard errors.
		All regressions include dummies for randomization strata, where strata are defined by gender, neighborhood, and microfinance client status.
		***\$\,p < 0.01$, **\$\,p < 0.05$, *\$\,p<0.10$.
		}
		\end{table}) ;

exit ;



